# SDGym Benchmark

In [1]:
import numpy as np
import pandas as pd
import sdgym
from echoflow import EchoFlow

def EchoFlowSynthesizer(real_data, categorical_columns, ordinal_columns):
    df = pd.DataFrame(real_data)
    for i in categorical_columns+ordinal_columns:
        df[i] = df[i].astype(int).astype(str)
    
    model = EchoFlow(nb_epochs=100)
    model.fit(df)
    new_df = model.sample(num_samples=len(df))
    
    for i in categorical_columns+ordinal_columns:
        new_df[i] = new_df[i].astype(int)
    arr = new_df.values
    
    return arr

def EchoFlowSynthesizeKDE(real_data, categorical_columns, ordinal_columns):
    df = pd.DataFrame(real_data)
    for i in categorical_columns+ordinal_columns:
        df[i] = df[i].astype(int).astype(str)
    
    model = EchoFlow(nb_epochs=100, use_kde=True)
    model.fit(df)
    new_df = model.sample(num_samples=len(df))
    
    for i in categorical_columns+ordinal_columns:
        new_df[i] = new_df[i].astype(int)
    arr = new_df.values
    
    return arr

scores = sdgym.run(synthesizers=[
    EchoFlowSynthesizer, 
    EchoFlowSynthesizeKDE
], datasets=['ring', 'grid', 'gridr'], iterations=3)

Epoch 10 | Train Loss -0.649
Epoch 20 | Train Loss -0.688
Epoch 30 | Train Loss -0.700
Epoch 40 | Train Loss -0.707
Epoch 50 | Train Loss -0.709
Epoch 60 | Train Loss -0.731
Epoch 70 | Train Loss -0.727
Epoch 80 | Train Loss -0.728
Epoch 90 | Train Loss -0.720
Epoch 100 | Train Loss -0.731
Epoch 10 | Train Loss -0.622
Epoch 20 | Train Loss -0.677
Epoch 30 | Train Loss -0.697
Epoch 40 | Train Loss -0.715
Epoch 50 | Train Loss -0.718
Epoch 60 | Train Loss -0.715
Epoch 70 | Train Loss -0.719
Epoch 80 | Train Loss -0.711
Epoch 90 | Train Loss -0.714
Epoch 100 | Train Loss -0.717
Epoch 10 | Train Loss -0.647
Epoch 20 | Train Loss -0.687
Epoch 30 | Train Loss -0.701
Epoch 40 | Train Loss -0.711
Epoch 50 | Train Loss -0.707
Epoch 60 | Train Loss -0.728
Epoch 70 | Train Loss -0.720
Epoch 80 | Train Loss -0.732
Epoch 90 | Train Loss -0.726
Epoch 100 | Train Loss -0.724
Epoch 10 | Train Loss 0.058
Epoch 20 | Train Loss 0.023
Epoch 30 | Train Loss -0.052
Epoch 40 | Train Loss -0.045
Epoch 50 | Tr

In [2]:
scores.loc[["CTGAN", "EchoFlowSynthesizer", "EchoFlowSynthesizeKDE"]]

Unnamed: 0,grid/syn_likelihood,grid/test_likelihood,gridr/syn_likelihood,gridr/test_likelihood,ring/syn_likelihood,ring/test_likelihood,timestamp
CTGAN,-8.760635,-5.062972,-8.30975,-5.04831,-6.591324,-2.665281,2020-10-17 09:46:54.494331
EchoFlowSynthesizer,-6.71223,-4.437056,-6.496902,-4.475942,-1.932969,-1.796832,2020-12-30 23:10:22.816115
EchoFlowSynthesizeKDE,-5.402527,-4.063265,-5.531003,-4.154107,-2.27748,-1.842371,2020-12-30 23:10:22.816115
